Data

library(readr)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(DT)
## Warning: package 'DT' was built under R version 4.4.3
applications <- read_csv("C:/Users/danie/OneDrive/Desktop/DATA 824-Data Visualization/applications.csv") %>%
  clean_names()
## Rows: 95588 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): headline, item_condition
## dbl  (7): app_id, price_gel, price_usd, status_id, category_id, vehicle_type...
## dttm (1): insert_date
## date (1): app_register_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(applications)
## Rows: 95,588
## Columns: 11
## $ app_id            <dbl> 10074150, 10101861, 10101874, 10107865, 10107874, 10…
## $ headline          <chr> "SUZUKI Grand Vitara 1996", "FORD / MERCURY / LINCOL…
## $ price_gel         <dbl> 25, 15, 35, 100, 30, 160, 180, 30, 80, 80, 100, 160,…
## $ price_usd         <dbl> 9, 5, 13, 36, 11, 58, 66, 11, 29, 29, 36, 58, 11, 55…
## $ app_register_date <date> 2024-09-18, 2024-09-18, 2024-09-18, 2024-09-18, 202…
## $ status_id         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ category_id       <dbl> 531, 531, 531, 334, 531, 334, 334, 531, 567, 567, 33…
## $ vehicle_type_id   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ seller_id         <dbl> 581148, 581148, 581148, 581148, 581148, 581148, 5811…
## $ item_condition    <chr> "New", "New", "New", "New", "New", "New", "New", "Ne…
## $ insert_date       <dttm> 2024-10-08 13:14:11, 2024-10-08 13:14:22, 2024-10-0…
summary(applications)
##      app_id           headline           price_gel        price_usd    
##  Min.   :10074150   Length:95588       Min.   :   0.0   Min.   :  0.0  
##  1st Qu.:12106576   Class :character   1st Qu.:  37.0   1st Qu.: 29.0  
##  Median :12369408   Mode  :character   Median : 100.0   Median : 95.0  
##  Mean   :12215443                      Mean   : 179.6   Mean   :163.6  
##  3rd Qu.:12487635                      3rd Qu.: 250.0   3rd Qu.:220.0  
##  Max.   :12567606                      Max.   :1000.0   Max.   :999.0  
##  app_register_date      status_id      category_id  vehicle_type_id
##  Min.   :2021-06-09   Min.   :1.000   Min.   :  2   Min.   :1.000  
##  1st Qu.:2024-09-17   1st Qu.:1.000   1st Qu.:160   1st Qu.:1.000  
##  Median :2024-09-27   Median :1.000   Median :302   Median :1.000  
##  Mean   :2024-09-24   Mean   :1.006   Mean   :304   Mean   :1.004  
##  3rd Qu.:2024-10-02   3rd Qu.:1.000   3rd Qu.:452   3rd Qu.:1.000  
##  Max.   :2024-10-07   Max.   :4.000   Max.   :665   Max.   :3.000  
##    seller_id       item_condition      insert_date                    
##  Min.   :     79   Length:95588       Min.   :2024-10-08 13:14:11.00  
##  1st Qu.: 770839   Class :character   1st Qu.:2024-10-20 08:16:07.50  
##  Median :3020459   Mode  :character   Median :2024-10-25 12:51:06.00  
##  Mean   :2459079                      Mean   :2024-10-23 16:27:22.85  
##  3rd Qu.:4006239                      3rd Qu.:2024-10-28 18:24:05.75  
##  Max.   :5502015                      Max.   :2024-11-02 13:29:42.00

Price Distribution (USD)

ggplot(applications, aes(x = price_usd)) +
  geom_histogram(fill = "steelblue", bins = 50) +
  scale_x_continuous(labels = dollar_format()) +
  labs(
    title = "Distribution of Part Prices (USD)",
    x = "Price (USD)",
    y = "Number of Listings"
  ) +
  theme_minimal()

New vs Used Items

applications %>%
  count(item_condition) %>%
  ggplot(aes(x = item_condition, y = n, fill = item_condition)) +
  geom_col() +
  labs(
    title = "Item Condition Breakdown",
    x = "Condition",
    y = "Listings"
  ) +
  theme_minimal()

Listings Over Time

applications %>%
  mutate(month = floor_date(app_register_date, "month")) %>%
  count(month) %>%
  ggplot(aes(x = month, y = n)) +
  geom_line(color = "darkgreen", size = 1.2) +
  labs(
    title = "Listings Over Time",
    x = "Month",
    y = "Number of Listings"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Interactive Table (Top Listings by Date)

datatable(
  applications %>%
    select(app_id, headline, price_usd, item_condition, app_register_date) %>%
    arrange(desc(app_register_date)),
  options = list(pageLength = 10)
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Summary: Summary Insights

Listings are overwhelmingly “New” parts.

Majority of items fall under $100 USD.

Listing activity peaked in fall 2024, possibly due to seasonal demand or promotions.

This data can help parts managers adjust inventory and monitor market trends.